Librairies utilisées

library(ggplot2)
library(ggthemes)
library(lattice)
library(plot3D)
library(rriskDistributions)
library(stringr)
require(MASS)
## Loading required package: MASS

##Indicateur de temps

time.ini = Sys.time()

Fonctions utilisées

Fonction de lecture des stats résumées. La fonction est prévue pour lire les fichiers de stats résumées dans des répertoires de la forme “Ne100/simu_1” (en modifiant les valeurs de Ne et de simulations). Elle renvoie les résultats sous forme de data frame en ajoutant aux statistiques résumées les colonnes correspondant à l’effectif efficace et la simulation.

#Data frame construction
data.frame.stat = function(seq_ne, max_gen, max_simu){

  #Créer matrice vide qui contiendra les stats
  mat_stat = matrix(data = NA, nrow = length(seq_ne)*max_gen*max_simu, ncol = 64)
  mat_stat = as.data.frame(mat_stat)
  #Compteur pour indiquer les lignes à remplir
  cpt = 0

  for (ne in seq_ne) {
    if (dir.exists(str_c("Ne", toString(ne), "/"))) {
      dir_ne = str_c("Ne", toString(ne), "/") #Répertoire de taille effective
      row_min = cpt*max_gen*max_simu+1 #Ligne min. où on écrit les stats pour le Ne donné
      row_max = (cpt+1)*max_gen*max_simu         #Ligne max. où on écrit les stats  
      mat_stat[row_min:row_max,1] = toString(ne) #Ecriture du Ne correspondant pour les stats qui seront écrites
      cpt = cpt+1                             #Incrémentation du compteur
  
      for (nb_simu in 1:max_simu) {
        dir_simu = str_c("simu_", toString(nb_simu), "/")
        row_min_simu = (nb_simu-1)*max_gen + row_min
        row_max_simu = nb_simu*max_gen + row_min -1
        mat_stat[row_min_simu:row_max_simu,2] = nb_simu
        file_path = str_c(dir_ne, dir_simu, "final_sumstats.txt")
        file_stat = read.table(file_path, header = TRUE) #Lecture fichier stat résumées
        #Suppression 1ère colonne, contenant uniquement chiffres pour tri en bash. 
        mat_stat[row_min_simu:row_max_simu,3:63] = file_stat
        mat_stat[row_min_simu:row_max_simu,64] = rep((cpt-1), max_gen)
      }
    }
  }
  

  #Attribution nom colonnes selon appelation stat par MetHis
  colnames(mat_stat) = c("Ne", "simu", names(file_stat),"cpt")
  #Passage des Ne en facteur
  mat_stat$Ne = factor(as.factor(mat_stat$Ne), levels = as.character(seq_ne))
  #Passage simulations en entier
  mat_stat$simu = as.integer(mat_stat$simu)
  #Passage générations en entier
  mat_stat$Generation = as.integer(mat_stat$Generation)
  #Passage compteur en facteur
  mat_stat$cpt = as.integer(mat_stat$cpt)  
  #Passage stats en double
  for (numcol in 4:ncol(mat_stat)) {
    mat_stat[,numcol] = as.double(mat_stat[,numcol])
  }
  
  return(mat_stat)
}

Dans le cas où l’on travaille sur de très nombreuses simulations correspondant à un effectif efficace, cette fonction permet de calculer les statistiques moyennées pour les valeurs de Ne étudiées.

#Passage en moyenne
data.frame.mean = function(df_stat, max_gen, seq_ne){
  
  lrow = length(seq_ne)
  df_mean = matrix(data = NA, nrow = lrow*max_gen, ncol = 62 )
  df_mean = as.data.frame(df_mean)
  cpt = 0

  for (ne in seq_ne) {
    df_tmp = df_stat[which(df_stat$Ne == ne),]
    
    row_min = cpt*max_gen+1
    row_max = (cpt+1)*max_gen
    df_mean[row_min:row_max, 1] = toString(ne) 
    cpt = cpt+1
    
    for (gen in 0:(max_gen-1) ) {
      df_mean[row_min+gen, 2] = gen
      vec_tmp = apply(df_tmp[which(df_tmp$Generation == gen), 4:63], 2, mean)
      df_mean[row_min+gen, 3:62] = vec_tmp
    }
  }
  
  colnames(df_mean) = colnames(df_stat)[-2]
  df_mean$Ne = factor(as.factor(df_mean$Ne), levels = seq_ne )
  return(df_mean)
}

Fonction d’affichage des graphiques en 2D, avec un background blanc et les lignes horizontales de quadrillage affichées. Cette fonction permet d’afficher les lignes de régression ou les lignes reliant les points.

#Plot function
#Affichage d'une stat au cours des générations
plot_stat_gen = function(df, gen, stat, group_col = c(), color_col, titre, ligne = TRUE, legd = TRUE,
                         size_point = 0.1, line_t = "solid"){
  p = ggplot(df, aes(x = gen, y = stat,
                     group = group_col, color = color_col)) + ggtitle(titre)
  
  if (ligne) {
    p = p + geom_point(size = size_point, show.legend = legd)+
      geom_line(aes(linetype = line_t), show.legend = legd)
  }else{
    #smooth de la courbe pour obtenir régression et tempérer variabilité due au TCL
    p = p + geom_point(size = size_point, show.legend = legd)+ geom_smooth(show.legend = legd)
  }
  p = p + theme_classic()
  p = p + theme(panel.grid.major.y = element_line(size = 0.5,
                                                  linetype = 'solid',
                                                  colour = "#BABABA"),
                panel.grid.minor.y = element_line(size = 0.25,
                                                  linetype = 'solid',
                                                  colour = "#CECECE"))
  
  return(p)
}

Cette fonction d’affichage de graphique reprend la précédente, mais sans affficher les points.

#Plot function
#Affichage d'une stat au cours des générations
plot_without_point = function(df, gen, stat, color_col, titre, legd = TRUE){
  p = ggplot(df, aes(x = gen, y = stat, color = color_col)) + ggtitle(titre)
  
    #smooth de la courbe pour obtenir régression et tempérer variabilité due au TCL
  p = p + geom_smooth(show.legend = legd)
  p = p + theme_classic()
  p = p + theme(panel.grid.major.y = element_line(size = 0.5,
                                                  linetype = 'solid',
                                                  colour = "#BABABA"),
                panel.grid.minor.y = element_line(size = 0.25,
                                                  linetype = 'solid',
                                                  colour = "#CECECE"))
  #print(p)
  return(p)
}
improve_plot_bottle = function(p, vec_abline, lab_col, lab_line, vec_linetype){
  for (rg_abl in 1:length(vec_abline)) {
    p = p + geom_vline(xintercept = vec_abline[rg_abl], size = 0.4, color = "orange", linetype = "solid")
  }
  p = p + labs(color = lab_col, linetype = lab_line) + scale_linetype_manual(values=vec_linetype)
  return(p)
}

Fonction d’extraction d’une sous-matrice à partir d’une matrice principale en fonction de valeurs choisies de Ne.

extract_sub_mat = function(all_mat, list_ne){
  all_rows = c()
  for (ne in as.character(list_ne)) {
    all_rows = append(all_rows, which(all_mat[,1] == ne))
  }
  return(all_mat[all_rows,])
}

###Enregistrement des fonctions

print(difftime(Sys.time(), time.ini))
## Time difference of 0.08071566 secs

Population constante de tailles initiales différentes

Calcul des matrices de stats

seq_1024_16384 = 2**seq(10,14,1)
seq_50_1000 = seq(50,1000,1)
seq_tot = sort(c(seq_50_1000, seq_1024_16384))

mat_tot = data.frame.stat(seq_tot, 101, 1)
mat_50_1000 = extract_sub_mat(mat_tot, c(seq(50,200,10), seq(200,1000,100)))
mat_1024_16384 = extract_sub_mat(mat_tot, 2**seq(6,14,1))
mat_64_100_16384 = extract_sub_mat(mat_tot, c(2**seq(6,14,1), 100, 1000) )

#mat_1024_16384_mean = data.frame.mean(mat_1024_16384, 101, seq_1024_16384)

Graphiques obtenus : affichage de l’évolution d’une statistique donnée en fonction des générations colorée selon la Ne initiale.

p_tmp = plot_stat_gen(mat_tot, mat_tot$Generation, 
                      mat_tot$mean.het.adm, mat_tot$Ne, mat_tot$Ne,ligne = T,
                      "Moyenne htz en fonction des générations\n selon différentes Ne initiales",
                      legd = FALSE)
p_tmp = p_tmp+geom_hline(yintercept = mat_50_1000$mean.het.s1[1], color = "red")
p_tmp = p_tmp+geom_hline(yintercept = mat_50_1000$mean.het.s2[1], color = "black")
print(p_tmp)

p_tmp = plot_stat_gen(mat_50_1000, mat_50_1000$Generation, 
                      mat_50_1000$mean.het.adm, mat_50_1000$Ne, mat_50_1000$Ne,ligne = T,
                      "Moyenne htz en fonction des générations\nselon différentes Ne initiales",
                      legd = TRUE)
p_tmp = p_tmp+geom_hline(yintercept = mat_50_1000$mean.het.s1[1], color = "red")
p_tmp = p_tmp+geom_hline(yintercept = mat_50_1000$mean.het.s2[1], color = "black")
print(p_tmp)

write_cstt_plot = function(name_stat, mat, name_file,min_y, max_y){
  num_col = which(colnames(mat) == name_stat)
  p = plot_stat_gen(mat, mat$Generation, 
                mat[,num_col], mat$Ne, mat$Ne,ligne = T,
                str_c(name_stat, "en fonction des générations\n selon différentes Ne initiales"))
  png(filename = str_c("../../../Images/", name_file, ".png"), width = 1000, height = 800)
  print(p+ylim(min_y, max_y)+ labs(color = "Ne") + guides(linetype = FALSE))
  dev.off()
  ggsave(filename =  str_c("../../../Images/", name_file, ".eps"), plot = p)
}
write_cstt_plot("mean.het.adm", mat_64_100_16384, "moyenne_heterozygotie/moy_htz_ne_cst", 0.01, 0.062)
## Saving 7 x 5 in image
write_cstt_plot("Fst.s1.adm", mat_64_100_16384, "Fsts1adm/fst_s1_adm_ne_cst", -0.01, 0.5)
## Saving 7 x 5 in image
write_cstt_plot("mean.F.adm", mat_64_100_16384, "Fadm/f_adm_ne_cst", -0.03, 0.03)
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Saving 7 x 5 in image
plot_stat_gen(mat_tot, mat_tot$Generation, 
              mat_tot$Fst.s1.adm, mat_tot$Ne,mat_tot$Ne,
              "Fst s1 adm en fonction des générations\n selon différentes Ne initiales",
              legd = FALSE)

plot_stat_gen(mat_50_1000, mat_50_1000$Generation, 
              mat_50_1000$Fst.s1.adm, mat_50_1000$Ne,mat_50_1000$Ne,
              "Fst s1 adm en fonction des générations\n selon différentes Ne initiales")

plot_stat_gen(mat_1024_16384, mat_1024_16384$Generation, 
              mat_1024_16384$Fst.s1.adm, mat_1024_16384$Ne,mat_1024_16384$Ne,ligne = T,
              "Fst s1 adm en fonction des générations\n selon différentes Ne initiales")

###Populations à Ne constants

print(difftime(Sys.time(), time.ini))
## Time difference of 2.089165 mins

Populations croissantes

Réutilisation de la fonction de lecture de stats résumées pour des populations croissantes. Celles-ci sont enregistrées dans des répertoires de la forme “Ne100-XXX/Ne100-1000/simu_1” Fonction de lecture des stats résumées. La fonction est prévue pour lire les fichiers de stats résumées dans des répertoires de la forme “NeXXX/simu_XXX” (avec XXX remplacés par les valeurs de Ne et de simulations). Elle renvoie les résultats sous forme de data frame en ajoutant aux statistiques résumées les colonnes correspondant à l’effectif efficace et la simulation.

data.frame.increase = function(seq_ne_ini, seq_combi){
  for (ne_ini in seq_ne_ini) {
    if (dir.exists(str_c("Ne", ne_ini, "-XXX/"))) {
      dir_ne = str_c("Ne", ne_ini, "-XXX/")
      setwd(dir_ne)
      motif_detect = str_c("^",ne_ini,"-")
      vec_ne_tmp = seq_combi[which(str_detect(seq_combi, motif_detect))]
      mat_tmp = data.frame.stat(seq_ne = vec_ne_tmp, max_gen = 101, max_simu = 1)
      if (ne_ini == seq_ne_ini[1]) {
        mat_pop_inc = mat_tmp
      }else{
        mat_pop_inc = rbind(mat_pop_inc, mat_tmp)
      }
      setwd("../")
    }
  }
  return(na.omit(mat_pop_inc))
}

Lecture des data frame pour les combinaison de Ne entre ne0 (ne_ini) et nef (ne_fin). On extrait ensuite les stats pour lesquelles on a un nef de 10000.

setwd("../new_methis_pop_increase_50000_snp/")
seq_ne_ini = seq(50,100,2)
seq_ne_fin = seq(100, 10000, 100)
seq_combi = as.data.frame(outer(seq_ne_ini, seq_ne_fin, FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]

mat_pop_inc = data.frame.increase(seq_ne_ini, seq_combi)


mat_end_10000 = extract_sub_mat(mat_pop_inc,
                                as.data.frame(outer(seq(50,100,4),
                                                    c("10000"),
                                                    FUN="paste",
                                                    sep="-"))[,1])
plot_stat_gen(mat_pop_inc, mat_pop_inc$Generation, 
              mat_pop_inc$f3, mat_pop_inc$Ne,mat_pop_inc$Ne,
              "Stat en fonction des générations\n selon différentes Ne initiales",
              FALSE, legd = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

plot_stat_gen(mat_pop_inc, mat_pop_inc$Generation, 
              mat_pop_inc$mean.het.adm, mat_pop_inc$Ne,mat_pop_inc$Ne,
              "Stat en fonction des générations\n selon différentes Ne initiales",
              FALSE, legd = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

plot_stat_gen(mat_end_10000, mat_end_10000$Generation, 
              mat_end_10000$mean.het.adm, mat_end_10000$Ne,mat_end_10000$Ne,
              "Moyenne hétérozygotie en fonction des générations\n selon différentes Ne initiales",
              legd = TRUE)

plot_stat_gen(mat_end_10000, mat_end_10000$Generation, 
              mat_end_10000$f3, mat_end_10000$Ne,mat_end_10000$Ne,
              "F3 en fonction des générations\n selon différentes Ne initiales",
              legd = TRUE)

###Populations croissantes à Nu aléatoire dans la loi uniforme [0,0.5]

print(difftime(Sys.time(), time.ini))
## Time difference of 5.682296 mins

Variation Nu

setwd("../new_methis_pop_increase_50000_snp_Nu_var/")
seq_nu = sprintf("%.2f", seq(0.05, 0.45, 0.05))
seq_ne_ini = seq(50,100,10)
seq_ne_fin = seq(100, 10000, 200)
seq_combi = as.data.frame(outer(seq_ne_ini, seq_ne_fin, FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]

list_df_nu = list()
cpt = 1

for (nu in seq_nu) {
  if(dir.exists(str_c("Nu", nu ))){
    dir_nu = str_c("Nu", nu, "/")
    setwd(dir_nu)
    list_df_nu[[cpt]] = data.frame.increase(seq_ne_ini, seq_combi)
    cpt = cpt+1
    setwd("../")
  }
}

for (combi_lst in 1:length(list_df_nu)) {
  if (combi_lst == 1) {
    mat_pop_nu = list_df_nu[[combi_lst]]
    mat_pop_nu = data.frame(mat_pop_nu, Nu = seq_nu[combi_lst])
  }else{
    mat_tmp = list_df_nu[[combi_lst]]
    mat_tmp = data.frame(mat_tmp, Nu = seq_nu[combi_lst])
    mat_pop_nu = rbind(mat_pop_nu, mat_tmp)
  }
}

mat_pop_nu$Nu = as.factor(mat_pop_nu$Nu)
plot_without_point(mat_pop_nu, mat_pop_nu$Generation, 
                   mat_pop_nu$mean.het.adm, mat_pop_nu$Ne,
                   "Moyenne hétérozygotie en fonction des générations\n selon différentes Ne initiales\n et couleurs selon nu",
                   legd = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

plot_without_point(mat_pop_nu, mat_pop_nu$Generation, 
                   mat_pop_nu$mean.het.adm, mat_pop_nu$Nu,
                   "Moyenne hétérozygotie en fonction des générations\n selon différentes Ne initiales\n et couleurs selon nu",
                   legd = TRUE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# plot_stat_gen(mat_pop_nu, mat_pop_nu$Generation,
#              mat_pop_nu$mean.het.adm, mat_pop_nu$Ne,
#              "Stat en fonction des générations\n selon différentes Ne initiales",
#              TRUE, legd = FALSE)
# plot_stat_gen(mat_pop_nu, mat_pop_nu$Generation,
#              mat_pop_nu$mean.het.adm, mat_pop_nu$Nu,
#              "Stat en fonction des générations\n selon différentes Ne initiales",
#              TRUE, legd = FALSE)
for (rg_list in 1:length(list_df_nu)) {
  p_tmp = plot_stat_gen(list_df_nu[[rg_list]], list_df_nu[[rg_list]]$Generation, 
                        list_df_nu[[rg_list]]$mean.het.adm, list_df_nu[[rg_list]]$Ne,
                        list_df_nu[[rg_list]]$Ne,
                        str_c("Nu = ",seq_nu[rg_list]), TRUE,legd = FALSE)
  p_tmp = p_tmp+geom_hline(yintercept = list_df_nu[[rg_list]]$mean.het.s1[1],
                           color = "red")
  p_tmp = p_tmp+geom_hline(yintercept = list_df_nu[[rg_list]]$mean.het.s2[1],
                           color = "black")
  print(p_tmp)

}

for (rg_list in 1:length(list_df_nu)) {
  
  list_ne = list_df_nu[[rg_list]]$Ne[which(list_df_nu[[rg_list]]$Generation == 100)]
  list_ne = as.integer(unlist(str_split(list_ne, "-")))
  list_ne0 = list_ne[seq(1, length(list_ne), 2)]
  list_nef = list_ne[seq(2, length(list_ne), 2)]
  mat_tmp_u = list_df_nu[[rg_list]][which(list_df_nu[[rg_list]]$Generation == 100),]
  
  scatter3D(x = list_ne0, y = list_nef, z = mat_tmp_u$mean.het.adm,
            bty = "b2", pch = 16, ticktype = "detailed", main = seq_nu[rg_list],
            theta = 120, phi = 0, type = "s", cex.lab = 0.6, cex.axis = 0.5,
            xlab = "Ne0", ylab = "Nef", zlab = "Stat")

}

# grid.lines = 50
# fit <- loess(mat_tmp_u$mean.het.adm ~ list_nef + list_ne0)
# x.pred <- seq(min(list_ne0), max(list_ne0), length.out = grid.lines)
# y.pred <- seq(min(list_nef), max(list_nef), length.out = grid.lines)
# xy <- expand.grid( x = x.pred, y = y.pred)
# z.pred <- matrix(predict(fit, newdata = xy), 
#                  nrow = grid.lines, ncol = grid.lines)
# 
# fitpoints = predict(fit)
# scatter3D(x = list_ne0, y = list_nef, z = mat_tmp_u$mean.het.adm,
#           bty = "b2", pch = 16, ticktype = "detailed", main = seq_nu[rg_list],
#           theta = 150, phi = 0, type = "s",cex = 0.5, cex.lab = 0.6, cex.axis = 0.5,
#           xlab = "Ne0", ylab = "Nef", zlab = "Stat",
#           surf = list(x = x.pred, y = y.pred, z = z.pred,
#                       facets = NA))
for (rg_list in 1:length(list_df_nu)) {
  
  list_ne = list_df_nu[[rg_list]]$Ne[which(list_df_nu[[rg_list]]$Generation == 100)]
  list_ne = as.integer(unlist(str_split(list_ne, "-")))
  list_ne0 = list_ne[seq(1, length(list_ne), 2)]
  list_nef = list_ne[seq(2, length(list_ne), 2)]
  mat_tmp_u = list_df_nu[[rg_list]][which(list_df_nu[[rg_list]]$Generation == 100),]
  
  print(wireframe(mat_tmp_u$mean.het.adm ~ list_ne0 * list_nef, data = mat_tmp_u,
                  drape = TRUE, zlab = "Htz", xlab = "Ne0", ylab = "Nef",
                  screen = list(z = 40, x = -60),
                  scales = list(z.ticks=5, arrows=F, col="black", font=1, tck=0.8),
                  main = seq_nu[rg_list]))
  
}

Calcul matrice pour connaître Ne à chaque génération.

setwd("../new_methis_pop_increase_50000_snp_Nu_var/")

mat_ne_inc = data.frame()
cpt = 1

for (nu in seq_nu) {
  if(dir.exists(str_c("Nu", nu ))){
    dir_nu = str_c("Nu", nu, "/")
    setwd(dir_nu)
    for (ne_ini in seq_ne_ini) {
      if (dir.exists(str_c("Ne", ne_ini, "-XXX/"))) {
        dir_ne = str_c("Ne", ne_ini, "-XXX/")
        setwd(dir_ne)
        seq_combi = as.data.frame(outer(ne_ini, seq_ne_fin,
                                        FUN="paste", sep="-"))
        seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]
        for (combi in seq_combi) {
          if (dir.exists(str_c("Ne", combi))) {
            dir_ne_bis = str_c("Ne", combi, "/simu_1/")
            setwd(dir_ne_bis)
            cpt = cpt + 1
            file_tmp = read.table("simu_1.par", header = TRUE)
            if (nu == seq_nu[1] && ne_ini==seq_ne_ini[1] && combi == seq_combi[1]) {
              mat_ne_inc = as.data.frame(file_tmp[,2])
            }else{
              
              mat_ne_inc = cbind(mat_ne_inc, file_tmp[,2])
            }
            
            setwd("../../")
          }
        }
        setwd("../")
      }
    }
    setwd("../")

  }
}
mat_pop_nu = data.frame(mat_pop_nu,
                        ne_gen = unlist(mat_ne_inc , use.names = F),
                        n0 = rep(NA, nrow(mat_pop_nu)),
                        nf = rep(NA, nrow(mat_pop_nu)) )

for (i in 1:nrow(mat_pop_nu)) {
  vec_tmp = as.integer(unlist(str_split(mat_pop_nu$Ne[i], "-")))
  mat_pop_nu$n0[i] = vec_tmp[1]
  mat_pop_nu$nf[i] = vec_tmp[2]
}
for (rg_nu in seq_nu) {
  rg_nu
  df_tmp = mat_pop_nu[which(mat_pop_nu$Nu == rg_nu),]
  
  #grid.lines = 30
  #fit <- loess(df_tmp$mean.het.adm ~ df_tmp$Generation + as.numeric(df_tmp$ne_gen))
  #x.pred <- seq(min(df_tmp$Generation), max(df_tmp$Generation), length.out = grid.lines)
  #y.pred <- seq(min(as.numeric(df_tmp$ne_gen)),
  #              max(as.numeric(df_tmp$ne_gen)), length.out = grid.lines)
  #xy <- expand.grid( x = x.pred, y = y.pred)
  # z.pred <- matrix(predict(fit, newdata = xy), 
  #                nrow = grid.lines, ncol = grid.lines)
  
  scatter3D(x = df_tmp$Generation, y = df_tmp$ne_gen, z = df_tmp$mean.het.adm,
            bty = "b2", pch = 16, ticktype = "detailed", main = seq_nu[rg_nu],
            theta = 210, phi = 20, type = "s",cex = 0.2, cex.lab = 0.6, cex.axis = 0.5,
            xlab = "Gen", ylab = "Ne", zlab = "Stat")
}

#surf = list(x = x.pred, y = y.pred, z = z.pred,facets = NA)
seq_extract = c("0.05", "0.25", "0.45")
df_extract = mat_pop_nu[which(mat_pop_nu$Nu == seq_extract),]
scatter3D(x = mat_pop_nu$Generation, y = mat_pop_nu$ne_gen,
          z = as.numeric(as.character(mat_pop_nu$Nu)),
          bty = "b2", pch = 16, ticktype = "detailed",
          theta = 210, phi = 50, type = "s", cex.lab = 0.6, cex.axis = 0.5,
          xlab = "Gen", ylab = "Ne", zlab = "Nu")

setwd("../new_methis_pop_increase_50000_snp_Nu_var/")
seq_nu_bis = sprintf("%.2f", c(0.01, 0.25, 0.49))
seq_ne_ini = c(100)
seq_ne_fin = c(200, 400, seq(1000, 10000, 1000))
seq_combi = as.data.frame(outer(seq_ne_ini, seq_ne_fin, FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]


df_nu_reduce = data.frame()

for (nu in seq_nu_bis) {
  if(dir.exists(str_c("Nu", nu ))){
    dir_nu = str_c("Nu", nu, "/")
    setwd(dir_nu)
    
    df_tmp = data.frame.increase(seq_ne_ini, seq_combi)
    col_nu = as.character(rep(nu, nrow(df_tmp)))
    col_n0 = rep(NA, nrow(df_tmp))
    col_nf = rep(NA, nrow(df_tmp))
    
    if (nu == seq_nu_bis[1]) {
      df_nu_reduce = data.frame(df_tmp, nu = col_nu,
                                n0 = col_n0, nf = col_nf)
    }else{
      df_tmp = data.frame(df_tmp, nu = col_nu,
                          n0 = col_n0, nf = col_nf)
      df_nu_reduce = rbind(df_nu_reduce, df_tmp)
    }
    setwd("../")
  }
}

for (i in 1:nrow(df_nu_reduce)) {
  vec_tmp = as.integer(unlist(str_split(df_nu_reduce$Ne[i], "-")))
  df_nu_reduce$n0[i] = vec_tmp[1]
  df_nu_reduce$nf[i] = vec_tmp[2]
}
ne_gen = data.frame()
cpt = 1

for (nu in seq_nu_bis) {
  if(dir.exists(str_c("Nu", nu ))){
    dir_nu = str_c("Nu", nu, "/")
    setwd(dir_nu)
    for (ne_ini in seq_ne_ini) {
      if (dir.exists(str_c("Ne", ne_ini, "-XXX/"))) {
        dir_ne = str_c("Ne", ne_ini, "-XXX/")
        setwd(dir_ne)
        seq_combi = as.data.frame(outer(ne_ini, seq_ne_fin,
                                        FUN="paste", sep="-"))
        seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]
        for (combi in seq_combi) {
          if (dir.exists(str_c("Ne", combi))) {
            dir_ne_bis = str_c("Ne", combi, "/simu_1/")
            setwd(dir_ne_bis)
            cpt = cpt + 1
            file_tmp = read.table("simu_1.par", header = TRUE)
            if (nu == seq_nu_bis[1] && ne_ini==seq_ne_ini[1] && combi == seq_combi[1]) {
              ne_gen = as.data.frame(file_tmp[,2])
            }else{
              ne_gen = rbind(ne_gen, file_tmp[,2])
            }
            
            setwd("../../")
          }
        }
        setwd("../")
      }
    }
    setwd("../")

  }
}
df_nu_reduce_gen_100 = df_nu_reduce[which(df_nu_reduce$Generation == 100),]
df_nu_reduce$log_mean = log(df_nu_reduce$mean.het.adm)
scatter3D(x = df_nu_reduce$Generation, y = df_nu_reduce$nf,
          z = df_nu_reduce$mean.het.adm, colvar = as.numeric(df_nu_reduce$nu),
          col = c("#1B9E77", "#D95F02", "#7570B3"),
          colkey = list(at = c(0.1, 0.25, 0.4),
                        addlines = TRUE, length = 1, width = 0.5,
                        labels = levels(as.factor(df_nu_reduce$nu))),
          ticktype = "detailed",bty = "g",pch = 16,
          theta = 115, phi = 0, type = "s", cex = 0.5, cex.lab = 0.6, cex.axis = 0.5,
          xlab = "generation", ylab = "Nf", zlab = "stat", clab = "U")

  # scatter3D(x = df_tmp$Generation, y = vec_tot, z = df_tmp$mean.het.adm,
  #           bty = "b2", pch = 16, ticktype = "detailed", main = seq_nu[rg_nu],
  #           theta = 210, phi = 20, type = "s", cex.lab = 0.6, cex.axis = 0.5,
  #           xlab = "Gen", ylab = "Ne", zlab = "Stat")
seq_ne_ini3 = seq(100,100,0)
seq_multi3 = c(2,4,10,100)
seq_ne_fin3 = seq_multi3*seq_ne_ini3
seq_combi3 = as.data.frame(outer(seq_ne_ini3, seq_ne_fin3, FUN="paste", sep="-"))
seq_combi3 = as.data.frame.vector(unlist(seq_combi3))[[1]]

for (rg in c(1:length(seq_combi3))) {
  if (rg == 1) {
    mat_pop_nu3 = df_nu_reduce[which(df_nu_reduce$Ne == seq_combi3[rg]),]
  }else{
    tmp = df_nu_reduce[which(df_nu_reduce$Ne == seq_combi3[rg]),]
    mat_pop_nu3 = rbind(mat_pop_nu3, tmp)
  }
}
write_increase_plot = function(name_stat, mat, name_file, min_y, max_y){
  num_col = which(colnames(mat) == name_stat)
  p = plot_stat_gen(df = mat, gen = mat$Generation, 
                    stat = mat[,num_col], group_col = c(),
                    color_col = mat$Ne, line_t = mat$nu,
                    titre = str_c(name_stat," en fonction des générations\n selon différentes croissances de populations"),
                    legd = TRUE)
  
  p = improve_plot_bottle(p =p, vec_abline = c(), lab_col = "ne0-nef",
                       lab_line = "U", vec_linetype = c("dotted", "longdash", "solid"))

  png(filename = str_c("../../../Images/", name_file,".png"), width = 1000, height = 800)
  p = p+ylim(min_y, max_y)
  print(p)
  dev.off()
  ggsave(filename =  str_c("../../../Images/", name_file, ".eps"), plot = p)
}
write_increase_plot("mean.het.adm", mat_pop_nu3, "moyenne_heterozygotie/moy_htz_pop_inc_nu_var",0.01,0.062)
## Saving 7 x 5 in image
write_increase_plot("Fst.s1.adm", mat_pop_nu3, "Fsts1adm/fst_s1_adm_pop_inc_nu_var", 0, 0.5)
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 2 row(s) containing missing values (geom_path).
## Saving 7 x 5 in image
## Warning: Removed 2 rows containing missing values (geom_point).

## Warning: Removed 2 row(s) containing missing values (geom_path).
write_increase_plot("mean.F.adm", mat_pop_nu3, "Fadm/f_adm_pop_inc_nu_var", -0.03, 0.03)
## Warning: Removed 1 rows containing missing values (geom_point).
## Saving 7 x 5 in image
## Warning: Removed 1 rows containing missing values (geom_point).

###Populations croissantes avec Nu variable et fixé

print(difftime(Sys.time(), time.ini))
## Time difference of 10.71038 mins

Bottleneck

Nu fixé, alpha variable, N0 fixé, Nf fixé

setwd("../new_methis_bottleneck_50000_snp/")

seq_alpha = c(0.1, 0.5, 0.9)
seq_nu_red = c(0.01, 0.25, 0.49)
seq_bottle = seq(10, 90, 10)
seq_ne0 = c(1000)
seq_nef = c(1000)

seq_combi = as.data.frame(outer(seq_ne0, seq_nef, FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]

for (alpha in seq_alpha) {
  for (nu in seq_nu_red) {
    for (bottle in seq_bottle) {
      change_dir = str_c("alpha", alpha, "/Nu", nu, "/bottle",bottle,"/")
      setwd(change_dir)
      
      mat_tmp = data.frame.increase(seq_ne0, seq_combi)
      col_alpha = rep(alpha, nrow(mat_tmp))
      col_nu = rep(nu, nrow(mat_tmp))
      col_bottle = rep(bottle, nrow(mat_tmp))
      col_bott_u = as.numeric(rep(bottle, nrow(mat_tmp)))/as.numeric(rep(nu, nrow(mat_tmp)))
      col_bott_u = ceiling(col_bott_u/1)*1
      col_alpha_u = as.numeric(rep(nu, nrow(mat_tmp)))/as.numeric(rep(alpha, nrow(mat_tmp)))
      col_alpha_u = ceiling(col_alpha_u/0.001)*0.001
      mat_tmp = data.frame(mat_tmp, alpha = col_alpha,
                           U = col_nu, time_botl = col_bottle,
                           bott_u = col_bott_u, alpha_u = col_alpha_u)
      
      if (alpha==seq_alpha[1] & nu==seq_nu_red[1] & bottle==seq_bottle[1]) {
        mat_bottle = mat_tmp
      }else{
        mat_bottle = rbind(mat_bottle, mat_tmp)
      }
      setwd("../../../")
    }
  }
}

mat_bottle$alpha = as.factor(mat_bottle$alpha)
mat_bottle$U = as.factor(mat_bottle$U)
mat_bottle$time_botl = as.factor(mat_bottle$time_botl)
mat_bottle$cpt = as.factor(mat_bottle$cpt)
mat_bottle$bott_u = as.factor(mat_bottle$bott_u)
mat_bottle$alpha_u = as.factor(mat_bottle$alpha_u)

mat_bottle_time_50 = mat_bottle[which(mat_bottle$time_botl == 50),]
mat_bottle_time_20 = mat_bottle[which(mat_bottle$time_botl == 20),]
mat_bottle_time_80 = mat_bottle[which(mat_bottle$time_botl == 80),]
boucle_plot_bottle = function(mat, vec_stat, name_stat, vec_bott){
  for (bott in vec_bott) {
    new_mat = mat[which(mat$time_botl == bott),]
    new_vec = vec_stat[which(mat$time_botl == bott)]
    
    p = plot_stat_gen(new_mat, new_mat$Generation, 
                      new_vec, c(),
                      new_mat$alpha, size_point = 0,
                      str_c(name_stat, " en fonction des générations\nbottleneck ", bott, " N0 1000 Nf 1000"),
                      TRUE,legd = TRUE, line_t = new_mat$U)
    
    p = improve_plot_bottle(p = p, vec_abline = c(1,bott+1), lab_col = "alpha",
                            lab_line = "U", vec_linetype = c("solid", "longdash", "dotted"))
    
    print(p)
  }
}
plot_without_point(mat_bottle, mat_bottle$Generation, 
              mat_bottle$mean.het.adm, mat_bottle$alpha,
              "Stat en fonction des générations\n selon différents bottleneck",
              legd = TRUE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

Moyenne de l’hétérozygotie

boucle_plot_bottle(mat_bottle, mat_bottle$mean.het.adm, "Heterozygotie", c(20,50,80))

Fst

boucle_plot_bottle(mat_bottle, mat_bottle$Fst.s1.adm, "Fst s1 adm", c(20,50,80))

boucle_plot_bottle(mat_bottle, mat_bottle$Fst.s2.adm, "Fst s2 adm", c(20,50,80))

boucle_plot_bottle(mat_bottle, mat_bottle$mean.F.adm, "F mean", c(20,50,80))

mini_mat_bottle = rbind(mat_bottle_time_20, mat_bottle_time_50, mat_bottle_time_80)

p = plot_stat_gen(mat_bottle, mat_bottle$Generation, 
              mat_bottle$mean.het.adm, c(),
              mat_bottle$bott_u, line_t = mat_bottle$alpha,
              "Stat en fonction des générations\n selon différents bottleneck",TRUE,
              legd = TRUE)
p = p+  theme(legend.position="bottom", legend.box = "horizontal")
print(p)

p = plot_stat_gen(mini_mat_bottle, mini_mat_bottle$Generation, 
                mini_mat_bottle$mean.het.adm, c(),
                mini_mat_bottle$bott_u, line_t = mini_mat_bottle$alpha,
                "Stat en fonction des générations\n selon différents bottleneck",TRUE,
                legd = TRUE)
improve_plot_bottle(p =p, vec_abline = c(21, 51, 81), lab_col = "tpsbott/U",
                    lab_line = "alpha", vec_linetype = c("solid", "longdash", "dotted"))

write_bottle_plot = function(name_stat, mat, name_file, min_y, max_y){
  num_col = which(colnames(mat) == name_stat)
  p = plot_stat_gen(mat, mat$Generation, 
                    mat[,num_col], c(),
                    mat$alpha_u, line_t = mat$time_botl,
                    str_c(name_stat, " en fonction des générations\n selon différents bottleneck\n (Ne0 = Nef = 1000)"),TRUE,
                    legd = TRUE)
  p = improve_plot_bottle(p =p, vec_abline = c(21, 51, 81), lab_col = "U/alpha",
                          lab_line = "tpsbott", vec_linetype = c("dotted", "longdash", "solid"))

  png(filename = str_c("../../../Images/",name_file,".png"), width = 1000, height = 800)
  p = p+ylim(min_y, max_y)
  print(p)
  dev.off()
  ggsave(filename =  str_c("../../../Images/", name_file, ".eps"), plot = p)
}
p = plot_stat_gen(mini_mat_bottle, mini_mat_bottle$Generation, 
                  mini_mat_bottle$mean.het.adm, c(),
                  mini_mat_bottle$alpha_u, line_t = mini_mat_bottle$time_botl,
                  "Moyenne He en fonction des générations\n selon différents bottleneck\n (Ne0 = Nef = 1000)",TRUE,
                  legd = TRUE)
p = improve_plot_bottle(p =p, vec_abline = c(21, 51, 81), lab_col = "U/alpha",
                        lab_line = "tpsbott", vec_linetype = c("dotted", "longdash", "solid"))
print(p)

p = plot_stat_gen(mini_mat_bottle, mini_mat_bottle$Generation, 
                  mini_mat_bottle$Fst.s1.adm, c(),
                  mini_mat_bottle$alpha_u, line_t = mini_mat_bottle$time_botl,
                  "Fst s1 adm en fonction des générations\n selon différents bottleneck\n (Ne0 = Nef = 1000)",TRUE,
                  legd = TRUE)
p = improve_plot_bottle(p =p, vec_abline = c(21, 51, 81), lab_col = "U/alpha",
                        lab_line = "tpsbott", vec_linetype = c("dotted", "longdash", "solid"))
print(p)

p = plot_stat_gen(mini_mat_bottle, mini_mat_bottle$Generation, 
                  mini_mat_bottle$mean.adm.props, c(),
                  mini_mat_bottle$alpha_u, line_t = mini_mat_bottle$time_botl,
                  "Moyenne F en fonction des générations\n selon différents bottleneck\n (Ne0 = Nef = 1000)",TRUE,
                  legd = TRUE)
p = improve_plot_bottle(p =p, vec_abline = c(21, 51, 81), lab_col = "U/alpha",
                        lab_line = "tpsbott", vec_linetype = c("dotted", "longdash", "solid"))
print(p)

write_bottle_plot("mean.het.adm", mini_mat_bottle, "moyenne_heterozygotie/moy_htz_bottleneck",0.01,0.062)
## Saving 7 x 5 in image
write_bottle_plot("Fst.s1.adm", mini_mat_bottle, "Fsts1adm/fst_s1_adm_bottleneck", 0, 0.5)
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Saving 7 x 5 in image
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 row(s) containing missing values (geom_path).
write_bottle_plot("mean.F.adm", mini_mat_bottle, "Fadm/f_adm_bottleneck", -0.03, 0.03)
## Saving 7 x 5 in image
vec_tmp = c()
vec_tmp[which(mat_bottle$alpha == 0.1)] = 2
vec_tmp[which(mat_bottle$alpha == 0.5)] = 3
vec_tmp[which(mat_bottle$alpha == 0.9)] = 4

scatter3D(x = mat_bottle$Generation, y = as.numeric(as.character(mat_bottle$time_botl)),
          z = mat_bottle$mean.het.adm, colvar = as.numeric(vec_tmp),
          ticktype = "detailed",bty = "b2",pch = 16,
          theta = 65, phi = 0, type = "s",cex = 0.3, cex.lab = 0.6, cex.axis = 0.5,
          xlab = "generation", ylab = "bottle", zlab = "stat",
          col = c("#1B9E77", "#D95F02", "#7570B3"),
          colkey = list(at = c(2.3, 3, 3.7),
                        addlines = TRUE, length = 1, width = 0.5,
                        labels = c("0.1", "0.5", "0.9")),
          clab = "alpha")

# new_mat_bottle = data.frame(mat_bottle_time_80, mean_norm = rep(NA, nrow(mat_bottle_time_80)),
#                             sd_norm = rep(NA, nrow(mat_bottle_time_80)))
# 
# for (line in c(seq(1, nrow(new_mat_bottle), 100), seq(11, nrow(new_mat_bottle), 10) ) ){
#   vec_props = c(new_mat_bottle$perc10.adm.props[line],
#                 new_mat_bottle$perc20.adm.props[line],
#                 new_mat_bottle$perc30.adm.props[line],
#                 new_mat_bottle$perc40.adm.props[line],
#                 new_mat_bottle$perc50.adm.props[line],
#                 new_mat_bottle$perc60.adm.props[line],
#                 new_mat_bottle$perc70.adm.props[line],
#                 new_mat_bottle$perc80.adm.props[line],
#                 new_mat_bottle$perc90.adm.props[line])
#   courbe_props = invisible(get.norm.par(p = seq(0.1, 0.9, 0.1), (vec_props)))
#   
#   if (!is.na(courbe_props) ){
#       cat("\014")
#       dev.off()
#   }
# 
#   #cat("\014")
#   new_mat_bottle$mean_norm[line] = courbe_props[1]
#   new_mat_bottle$sd_norm[line] = courbe_props[2]
# }
# setwd("../../../Images/")
# 
# nb_simu = nrow(new_mat_bottle)/101
# 
# png(file="example%03d.png", width=600, heigh=400)
# 
# for (gen in seq(10,101, 10) ) {
#   new_mat_tmp = new_mat_bottle[which(new_mat_bottle$Generation == gen),]
#   for (alpha in seq_alpha) {
#     new_mat_tmp = new_mat_tmp[which(new_mat_tmp$alpha == alpha),]
#     for (u in seq_nu_bis) {
#       new_mat_tmp = new_mat_tmp[which(new_mat_tmp$U == u),]
#       if (alpha == seq_alpha[1] && u == seq_nu_bis[1]) {
#         plot(function(x) dnorm(x,new_mat_tmp$mean_norm,new_mat_tmp$sd_norm),
#              0, 1.2,xlab="x",ylab="",main = gen, col ="red")
#       }else{
#         if (!identical(new_mat_tmp$mean_norm, numeric(0)) |
#             !identical(new_mat_tmp$sd_norm, numeric(0))) {
#           curve(function(x) dnorm(x,new_mat_tmp$mean_norm,new_mat_tmp$sd_norm),
#                 col="red",from=0,to=1.2)
#         }
#         
#       }
#     }
#   }
# }
# 
# dev.off()
# 
# system("convert -delay 50 *.png loi_norm.gif")
# 
# file.remove(list.files(pattern=".png"))
#fit.perc(p = seq(0.1, 0.9, 0.1), (vec_props))
setwd("../../../Images/")

mat_bottle_time_80_u_0.01 = mat_bottle_time_80[which(mat_bottle_time_80$U == 0.01),]
nb_simu = nrow(mat_bottle_time_80_u_0.01) / 101

png(file="example%03d.png", width=600, heigh=400)
#c(seq(1,10,1), seq(11,101,10))
for (line in  c(seq(1,10,1), seq(11,101,10))){
  for (alpha in seq_alpha) {
    mat_tmp_bott = mat_bottle_time_80_u_0.01[which(mat_bottle_time_80_u_0.01$alpha == alpha),]
    vec_x = seq(0,100,10)
    vec_y = c(mat_tmp_bott$perc0.adm.props[line],
              mat_tmp_bott$perc10.adm.props[line],
              mat_tmp_bott$perc20.adm.props[line],
              mat_tmp_bott$perc30.adm.props[line],
              mat_tmp_bott$perc40.adm.props[line],
              mat_tmp_bott$perc50.adm.props[line],
              mat_tmp_bott$perc60.adm.props[line],
              mat_tmp_bott$perc70.adm.props[line],
              mat_tmp_bott$perc80.adm.props[line],
              mat_tmp_bott$perc90.adm.props[line],
              mat_tmp_bott$perc100.adm.props[line])
    
    if (alpha == seq_alpha[1]) {
      hist(vec_y, breaks = vec_y, main = line-1, col = "#A5260A", xlim = c(-0.2,1.2))
      # box_tmp = data.frame(perc = vec_x, val = vec_y, alpha = rep(alpha, length(vec_x)))
      # plot(vec_y, vec_x, type = "b", col = "#A5260A", main = line-1,
      #      ylim = c(0,100), xlim =c(-0.5,1.2))
    }else{
      hist(vec_y, breaks = vec_y, add = TRUE, col = (as.numeric(as.character(mat_tmp_bott$alpha))*10+2))
      # box_col = data.frame(perc = vec_x, val = vec_y, alpha = rep(alpha, length(vec_x)))
      # box_tmp = rbind(box_tmp, box_col)
      # lines(vec_y, vec_x, type = "b", col = (as.numeric(as.character(mat_tmp_bott$alpha))*10+3))
    }
  }
  # boxplot(val~alpha, data = box_tmp, main = line-1, col = c("#FF866A", "#77B5FE","#87E990"))
  legend("topleft", title="Alpha",names(summary(as.factor(mat_tmp_bott$alpha))),
         fill = c("#A5260A", 7,11), cex=0.8)
  # p_box <- ggplot(box_tmp, aes(x = as.factor(alpha), y = val, group=alpha, color = as.factor(alpha))) + 
  #   geom_boxplot() + labs(title = line-1)
  # print(p_box)FALSE
}

dev.off()
## png 
##   2
system("convert -delay 120 *.png hist_bottle1.gif")

file.remove(list.files(pattern=".png"))
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE
#plot(vec_test_x, vec_test, type = "b")

###Bottleneck

print(difftime(Sys.time(), time.ini))
## Time difference of 11.2073 mins